" Unit 4 - Lecture 1 "
"------------------------------------------------------------------------"

" Linear Regression "

"------------------------------------------------------------------------"

" Building the Model "
"

<var> = lm(formula)

formula = <dep var> ~ <independent var> "

"------------------------------------------------------------------------"

"
Problem:
Load the inbuilt dataset mtcars.

Predict the variable 'mpg' using
all the other variables available in
the model.

Comment on the characteristics on the
model built.

"
"
Question:
- Identify Y and Xi's
"

" Building the Model "

" Creating a Model with One Regressor "

model = lm(mpg ~ wt,
           data = mtcars)

" OR "

model = lm(mtcars$mpg ~ mtcars$wt)

"
Both the above codes to build a 
Linear Regression model is same.
"

"------------------------------------------------------------------------"


" summary(model) provides R^2, Adj R^2,
  Global Testing, 
  Test for Indiviual Regression Coefficient 
"

summary(model)

"
Understand and Interpret each and every
component available in the output.
"

"------------------------------------------------------------------------"

" Making Prediction(s) "

"
Output for fitted.values / predict (if new wt given)
mpg = 37.285 - 5.344 * wt

"

" Manual "


" Using predict() Function "

# Syntax:
predict(<model>,
        newdata = <Xs>)


"------------------------------------------------------------------------"

" Global Testing "
"
H0: B1 = B2 = ... = Bk = 0
v/s
H1: Atleast one Bi != 0

"

anova(model)


"------------------------------------------------------------------------"

" Working with Categorical Variables "

str(mtcars)
mtcars$am = as.factor(mtcars$am)
str(mtcars)


"------------------------------------------------------------------------"

" Model with only Intercept term / Null Model "
"
Y = B0
"
model = lm(mpg ~ 1,
           data = mtcars)

anova(model)

" Model with NO Intercept term "
"
Y = B1 * X
"

model = lm(mpg ~ cyl,
           data = mtcars)

plot(mtcars$wt,
     mtcars$mpg,
     xlim = c(-5,10),
     ylim = c(-5,10))

abline(model,
       col = "red")

abline(0,1,
       col = "blue")

abline(h = 0,
       v = 0,
       col = "yellow")

" Model with all regressors "
model = lm(<target> ~ .,
           data = )

model = lm(mpg ~ .,
           data = mtcars)


" Model with one regressors "
model = lm(<target> ~ <reg_one>,
           data = )


" Model with two regressors "
model = lm(<target> ~ <reg_one> + <reg_two>,
           data = )

model = lm(mpg ~ wt + cyl + disp,
           data = mtcars)


" Model except One regressors "

model = lm(mpg ~ . - cyl,
           data = mtcars)


"------------------------------------------------------------------------"

" Plotting the Regression Line "

plot(mtcars$wt,
     mtcars$mpg)

model = lm(mpg ~ wt,mtcars)

abline(model,col = "red")



"------------------------------------------------------------------------"

" Assumptions Testing "
"
1.) Homoscedascticity of errors
2.) No Autocorrelation among residuals
3.) Relationship b/w 'Y' & 'Xi' is linear
4.) Errors ~ Normal

"


"------------------------------------------------------------------------"

" R^2 v/s Adj.R^2 "

plot(model)
plot(model$residuals)

plot(mtcars$wt,
     mtcars$mpg)

X = seq(0,100,0.5)
Y = exp(X)

model.new = lm(Y ~ X)

plot(X,Y)
abline(model.new,col = "red")

plot(model.new$residuals)



model = lm(mpg ~ wt,mtcars)
model.comp = lm(mpg ~ wt + hp,mtcars)
model.comp.2 = lm(mpg ~ wt + hp + 
                    garbage + garbage2,mtcars)

anova(model)
anova(model.comp)
anova(model.comp.2)

summary(model)
summary(model.comp)
summary(model.comp.2)


mtcars$garbage = rnorm(32,100,25)
mtcars$garbage2 = rnorm(32,100,25)


"------------------------------------------------------------------------"

" Question "
" Import the data Body Fat.

1.) Fit a Regression model containing only the
    Intercept 

2.) Fit a Regression Model Containing
    all explanatory variables.
    Comment on the Accuracy.

3.) Verify the assumptions.

4.) Add a variable 'Group' in the data set,
    such that
    Age < 18 : Band 1
    Age >= 18 & Age <= 36 : Band 2
    Else : Band 3
    
    and remove the Age column from the data set.

5.) Fit a new model and comment

"

"
Adj.R^2 = 1 - (MSSres / MSST)
"


Data = read.csv(file.choose())


Data$Group = ifelse(Data$Age < 18,"Band 1",
                    ifelse(Data$Age >= 18 &
                             Data$Age <= 36,"Band 2",
                           "Band 3"))

Data$Age = NULL

"1."

model.1 = lm(Density ~ 1,Data)

"2."

model.2 = lm(Density ~ .,Data)

summary(model.2)


"3."

model.2 = lm(Density ~ .,
             Data[-c(96,182,48,76), ])

plot(model.2,which = 2)
plot(model.2$residuals)

plot(Data$Age,
     Data$Density)

"------------------------------------------------------------------------"


" Confidence Interval For Beta's "

# Syntax
confint(<model>)

model = lm(mpg ~ .,mtcars)
summary(model)

confint(model)


"------------------------------------------------------------------------"

" Feature Selection "

" Forward Selection "

"
mtcars - disp, wt, qsec
"

"Step 0"
model0 = lm(mpg ~ 1,mtcars)

"Step 1"
model1 = lm(mpg ~ wt,mtcars)
summary(model1)$adj.r

model1 = lm(mpg ~ disp,mtcars)
summary(model1)$adj.r

model1 = lm(mpg ~ qsec,mtcars)
summary(model1)$adj.r


model1.final = lm(mpg ~ wt,mtcars)
summary(model1.final)$adj.r


"Step 2"
model2 = lm(mpg ~ wt + disp,mtcars)
summary(model2)$adj.r

model2 = lm(mpg ~ wt + qsec,mtcars)
summary(model2)$adj.r

model2.final = lm(mpg ~ wt + qsec,mtcars)
summary(model2.final)$adj.r


"Step 3"

model3 = lm(mpg ~ wt + qsec + disp,mtcars)
summary(model3)$adj.r


"Alternative"

"Step 0"
model0 = lm(mpg ~ 1,mtcars)

"Step 1"
model1 = update(model0, .~. + wt)
summary(model1)$adj.r

model1 = update(model0, .~. + disp)
summary(model1)$adj.r

model1 = update(model0, .~. + qsec)
summary(model1)$adj.r


model1.final = update(model0, .~. + wt)
summary(model1.final)$adj.r


"Step 2"
model2 = update(model1.final, .~. + disp)
summary(model2)$adj.r

model2 = update(model1.final, .~. + qsec)
summary(model2)$adj.r

model2.final = update(model1.final, .~. + qsec)
summary(model2.final)$adj.r


"Step 3"

model3 = update(model2.final, .~. + disp)
summary(model3)$adj.r





" Backward Selection "

"Step 0 - Full Model"
model0 = lm(mpg ~ qsec + wt + disp,mtcars)
summary(model0)$adj.r

"Step 1"
model1 = update(model0, .~. - wt)
summary(model1)$adj.r

model1 = update(model0, .~. - disp)
summary(model1)$adj.r

model1 = update(model0, .~. - qsec)
summary(model1)$adj.r


model1.final = update(model0, .~. - disp)
summary(model1.final)$adj.r


"Step 2"
model2 = update(model1.final, .~. - wt)
summary(model2)$adj.r

model2 = update(model1.final, .~. - qsec)
summary(model2)$adj.r

model2.final = model1.final
summary(model2.final)$adj.r


"------------------------------------------------------------------------"

" Generalized Linear Models "

" Two components.
  Family: Distribution of Y
  Link function "



"
Few Family:
Gamma()
gaussian()
poisson()
binomial() 


Few Link Functions:
log
identity
inverse

"

" Plot a Density chart to see the shape of the 
  distribution (cont. distribution) "

plot(density(mtcars$mpg))


model = glm(mpg ~ .,
            mtcars)
model

model = glm(mpg ~ .,
            mtcars,
            family = gaussian())
model

model = lm(mpg ~ .,
            mtcars)
model


"------------------------------------------------------------------------"

" Introducing AIC "
" AIC: Akaike's Information Criteria "

" Know the rules about AIC "
"
- Lower the AIC, the better it is
- AIC can be < 0
"
"------------------------------------------------------------------------"

model0 = glm(mpg ~ 1,mtcars,family = Gamma())

model1 = glm(mpg ~ wt,mtcars,family = Gamma())

model2 = glm(mpg ~ wt + qsec,mtcars,family = Gamma())

model3 = glm(mpg ~ wt + qsec + disp,
             mtcars,family = Gamma())

model4 = glm(mpg ~ .,mtcars,family = Gamma())

mtcars$G1 = runif(32,-10000,10000)
mtcars$G2 = runif(32,-10000,10000)

model5 = glm(mpg ~ wt + qsec +
               G1 + G2,mtcars,family = Gamma())

AIC(model0)
AIC(model1)
AIC(model2)
AIC(model3)
AIC(model4)
AIC(model5)
